{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Guardar y deployar un modelo predictivo\n",
    "\n",
    "Para este ejemplo utilizaremos un árbol de decisión"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "import os\n",
    "import pandas as pd\n",
    "from xgboost.sklearn import XGBClassifier\n",
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.metrics import accuracy_score, confusion_matrix, precision_score, recall_score, f1_score"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>Pregnancies</th>\n",
       "      <th>Glucose</th>\n",
       "      <th>BloodPressure</th>\n",
       "      <th>SkinThickness</th>\n",
       "      <th>Insulin</th>\n",
       "      <th>BMI</th>\n",
       "      <th>DiabetesPedigreeFunction</th>\n",
       "      <th>Age</th>\n",
       "      <th>Outcome</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>6</td>\n",
       "      <td>148</td>\n",
       "      <td>72</td>\n",
       "      <td>35</td>\n",
       "      <td>0</td>\n",
       "      <td>33.6</td>\n",
       "      <td>0.627</td>\n",
       "      <td>50</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>85</td>\n",
       "      <td>66</td>\n",
       "      <td>29</td>\n",
       "      <td>0</td>\n",
       "      <td>26.6</td>\n",
       "      <td>0.351</td>\n",
       "      <td>31</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>8</td>\n",
       "      <td>183</td>\n",
       "      <td>64</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>23.3</td>\n",
       "      <td>0.672</td>\n",
       "      <td>32</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1</td>\n",
       "      <td>89</td>\n",
       "      <td>66</td>\n",
       "      <td>23</td>\n",
       "      <td>94</td>\n",
       "      <td>28.1</td>\n",
       "      <td>0.167</td>\n",
       "      <td>21</td>\n",
       "      <td>0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>0</td>\n",
       "      <td>137</td>\n",
       "      <td>40</td>\n",
       "      <td>35</td>\n",
       "      <td>168</td>\n",
       "      <td>43.1</td>\n",
       "      <td>2.288</td>\n",
       "      <td>33</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   Pregnancies  Glucose  BloodPressure  SkinThickness  Insulin   BMI  \\\n",
       "0            6      148             72             35        0  33.6   \n",
       "1            1       85             66             29        0  26.6   \n",
       "2            8      183             64              0        0  23.3   \n",
       "3            1       89             66             23       94  28.1   \n",
       "4            0      137             40             35      168  43.1   \n",
       "\n",
       "   DiabetesPedigreeFunction  Age  Outcome  \n",
       "0                     0.627   50        1  \n",
       "1                     0.351   31        0  \n",
       "2                     0.672   32        1  \n",
       "3                     0.167   21        0  \n",
       "4                     2.288   33        1  "
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = pd.read_csv(os.path.join('../Datasets/diabetes.csv'))\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "feature_cols = ['Pregnancies', 'Insulin', 'BMI', 'Age','Glucose','BloodPressure','DiabetesPedigreeFunction']\n",
    "X = df[feature_cols]\n",
    "Y = df['Outcome']\n",
    "X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state=1) # 70% training, 30% test"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Optimización de parámetros"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "#https://github.com/conda-forge/hyperopt-feedstock\n",
    "from hyperopt import fmin, tpe, hp, STATUS_OK,Trials"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "space ={\n",
    "    'n_estimators':hp.quniform('n_estimators',100,1000,1),  \n",
    "    'learning_rate':hp.quniform('learning_rate',0.025,0.5,0.025),\n",
    "    'max_depth':hp.quniform('max_depth',1,13,1),\n",
    "    'subsample': hp.quniform('subsample',0.5,1,0.05),\n",
    "    'colsample_bytree':hp.quniform('colsample_bytree',0.5,1,0.05),\n",
    "    'nthread':6,\n",
    "    'silent':1\n",
    "}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "def objective(params):\n",
    "    params['n_estimators'] = int(params['n_estimators'])\n",
    "    params['max_depth'] = int(params['max_depth'])  \n",
    "    classifier = XGBClassifier(**params)\n",
    "    classifier.fit(X_train,Y_train)   \n",
    "    accuracy = accuracy_score(Y_test, classifier.predict(X_test))\n",
    "    return {'loss': 1-accuracy, 'status': STATUS_OK}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[17:15:14] WARNING: /Users/runner/work/xgboost/xgboost/src/learner.cc:576: \n",
      "Parameters: { \"silent\" } might not be used.\n",
      "\n",
      "  This could be a false alarm, with some parameters getting used by language bindings but\n",
      "  then being mistakenly passed down to XGBoost core, or some parameter actually being used\n",
      "  but getting flagged wrongly here. Please open an issue if you find any such cases.\n",
      "\n",
      "\n",
      "[17:15:14] WARNING: /Users/runner/work/xgboost/xgboost/src/learner.cc:1115: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.\n",
      "  0%|          | 0/20 [00:00<?, ?trial/s, best loss=?]"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/site-packages/xgboost/sklearn.py:1224: UserWarning: The use of label encoder in XGBClassifier is deprecated and will be removed in a future release. To remove this warning, do the following: 1) Pass option use_label_encoder=False when constructing XGBClassifier object; and 2) Encode your labels (y) as integers starting with 0, i.e. 0, 1, 2, ..., [num_class - 1].\n",
      "  warnings.warn(label_encoder_deprecation_msg, UserWarning)\n",
      "\n",
      "/usr/local/lib/python3.9/site-packages/xgboost/data.py:262: FutureWarning: pandas.Int64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.\n",
      "  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):\n",
      "\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[17:15:15] WARNING: /Users/runner/work/xgboost/xgboost/src/learner.cc:576:       \n",
      "Parameters: { \"silent\" } might not be used.\n",
      "\n",
      "  This could be a false alarm, with some parameters getting used by language bindings but\n",
      "  then being mistakenly passed down to XGBoost core, or some parameter actually being used\n",
      "  but getting flagged wrongly here. Please open an issue if you find any such cases.\n",
      "\n",
      "\n",
      "[17:15:15] WARNING: /Users/runner/work/xgboost/xgboost/src/learner.cc:1115: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.\n",
      "  5%|▌         | 1/20 [00:00<00:06,  3.09trial/s, best loss: 0.22510822510822515]"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/site-packages/xgboost/sklearn.py:1224: UserWarning: The use of label encoder in XGBClassifier is deprecated and will be removed in a future release. To remove this warning, do the following: 1) Pass option use_label_encoder=False when constructing XGBClassifier object; and 2) Encode your labels (y) as integers starting with 0, i.e. 0, 1, 2, ..., [num_class - 1].\n",
      "  warnings.warn(label_encoder_deprecation_msg, UserWarning)\n",
      "\n",
      "/usr/local/lib/python3.9/site-packages/xgboost/data.py:262: FutureWarning: pandas.Int64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.\n",
      "  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):\n",
      "\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[17:15:15] WARNING: /Users/runner/work/xgboost/xgboost/src/learner.cc:576:       \n",
      "Parameters: { \"silent\" } might not be used.\n",
      "\n",
      "  This could be a false alarm, with some parameters getting used by language bindings but\n",
      "  then being mistakenly passed down to XGBoost core, or some parameter actually being used\n",
      "  but getting flagged wrongly here. Please open an issue if you find any such cases.\n",
      "\n",
      "\n",
      "[17:15:15] WARNING: /Users/runner/work/xgboost/xgboost/src/learner.cc:1115: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.\n",
      " 10%|█         | 2/20 [00:00<00:07,  2.53trial/s, best loss: 0.22510822510822515]"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/site-packages/xgboost/sklearn.py:1224: UserWarning: The use of label encoder in XGBClassifier is deprecated and will be removed in a future release. To remove this warning, do the following: 1) Pass option use_label_encoder=False when constructing XGBClassifier object; and 2) Encode your labels (y) as integers starting with 0, i.e. 0, 1, 2, ..., [num_class - 1].\n",
      "  warnings.warn(label_encoder_deprecation_msg, UserWarning)\n",
      "\n",
      "/usr/local/lib/python3.9/site-packages/xgboost/data.py:262: FutureWarning: pandas.Int64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.\n",
      "  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):\n",
      "\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[17:15:16] WARNING: /Users/runner/work/xgboost/xgboost/src/learner.cc:576:       \n",
      "Parameters: { \"silent\" } might not be used.\n",
      "\n",
      "  This could be a false alarm, with some parameters getting used by language bindings but\n",
      "  then being mistakenly passed down to XGBoost core, or some parameter actually being used\n",
      "  but getting flagged wrongly here. Please open an issue if you find any such cases.\n",
      "\n",
      "\n",
      "[17:15:16] WARNING: /Users/runner/work/xgboost/xgboost/src/learner.cc:1115: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.\n",
      " 15%|█▌        | 3/20 [00:01<00:06,  2.45trial/s, best loss: 0.22510822510822515]"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/site-packages/xgboost/sklearn.py:1224: UserWarning: The use of label encoder in XGBClassifier is deprecated and will be removed in a future release. To remove this warning, do the following: 1) Pass option use_label_encoder=False when constructing XGBClassifier object; and 2) Encode your labels (y) as integers starting with 0, i.e. 0, 1, 2, ..., [num_class - 1].\n",
      "  warnings.warn(label_encoder_deprecation_msg, UserWarning)\n",
      "\n",
      "/usr/local/lib/python3.9/site-packages/xgboost/data.py:262: FutureWarning: pandas.Int64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.\n",
      "  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):\n",
      "\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[17:15:16] WARNING: /Users/runner/work/xgboost/xgboost/src/learner.cc:576:       \n",
      "Parameters: { \"silent\" } might not be used.\n",
      "\n",
      "  This could be a false alarm, with some parameters getting used by language bindings but\n",
      "  then being mistakenly passed down to XGBoost core, or some parameter actually being used\n",
      "  but getting flagged wrongly here. Please open an issue if you find any such cases.\n",
      "\n",
      "\n",
      "[17:15:16] WARNING: /Users/runner/work/xgboost/xgboost/src/learner.cc:1115: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.\n",
      " 20%|██        | 4/20 [00:01<00:06,  2.53trial/s, best loss: 0.22510822510822515]"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/site-packages/xgboost/sklearn.py:1224: UserWarning: The use of label encoder in XGBClassifier is deprecated and will be removed in a future release. To remove this warning, do the following: 1) Pass option use_label_encoder=False when constructing XGBClassifier object; and 2) Encode your labels (y) as integers starting with 0, i.e. 0, 1, 2, ..., [num_class - 1].\n",
      "  warnings.warn(label_encoder_deprecation_msg, UserWarning)\n",
      "\n",
      "/usr/local/lib/python3.9/site-packages/xgboost/data.py:262: FutureWarning: pandas.Int64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.\n",
      "  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):\n",
      "\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[17:15:16] WARNING: /Users/runner/work/xgboost/xgboost/src/learner.cc:576:       \n",
      "Parameters: { \"silent\" } might not be used.\n",
      "\n",
      "  This could be a false alarm, with some parameters getting used by language bindings but\n",
      "  then being mistakenly passed down to XGBoost core, or some parameter actually being used\n",
      "  but getting flagged wrongly here. Please open an issue if you find any such cases.\n",
      "\n",
      "\n",
      "[17:15:16] WARNING: /Users/runner/work/xgboost/xgboost/src/learner.cc:1115: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.\n",
      " 30%|███       | 6/20 [00:02<00:04,  2.90trial/s, best loss: 0.22510822510822515]"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/site-packages/xgboost/sklearn.py:1224: UserWarning: The use of label encoder in XGBClassifier is deprecated and will be removed in a future release. To remove this warning, do the following: 1) Pass option use_label_encoder=False when constructing XGBClassifier object; and 2) Encode your labels (y) as integers starting with 0, i.e. 0, 1, 2, ..., [num_class - 1].\n",
      "  warnings.warn(label_encoder_deprecation_msg, UserWarning)\n",
      "\n",
      "/usr/local/lib/python3.9/site-packages/xgboost/data.py:262: FutureWarning: pandas.Int64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.\n",
      "  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):\n",
      "\n",
      "/usr/local/lib/python3.9/site-packages/xgboost/sklearn.py:1224: UserWarning: The use of label encoder in XGBClassifier is deprecated and will be removed in a future release. To remove this warning, do the following: 1) Pass option use_label_encoder=False when constructing XGBClassifier object; and 2) Encode your labels (y) as integers starting with 0, i.e. 0, 1, 2, ..., [num_class - 1].\n",
      "  warnings.warn(label_encoder_deprecation_msg, UserWarning)\n",
      "\n",
      "/usr/local/lib/python3.9/site-packages/xgboost/data.py:262: FutureWarning: pandas.Int64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.\n",
      "  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):\n",
      "\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[17:15:17] WARNING: /Users/runner/work/xgboost/xgboost/src/learner.cc:576:       \n",
      "Parameters: { \"silent\" } might not be used.\n",
      "\n",
      "  This could be a false alarm, with some parameters getting used by language bindings but\n",
      "  then being mistakenly passed down to XGBoost core, or some parameter actually being used\n",
      "  but getting flagged wrongly here. Please open an issue if you find any such cases.\n",
      "\n",
      "\n",
      "[17:15:17] WARNING: /Users/runner/work/xgboost/xgboost/src/learner.cc:1115: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.\n",
      "[17:15:17] WARNING: /Users/runner/work/xgboost/xgboost/src/learner.cc:576:       \n",
      "Parameters: { \"silent\" } might not be used.\n",
      "\n",
      "  This could be a false alarm, with some parameters getting used by language bindings but\n",
      "  then being mistakenly passed down to XGBoost core, or some parameter actually being used\n",
      "  but getting flagged wrongly here. Please open an issue if you find any such cases.\n",
      "\n",
      "\n",
      "[17:15:17] WARNING: /Users/runner/work/xgboost/xgboost/src/learner.cc:1115: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.\n",
      " 35%|███▌      | 7/20 [00:02<00:04,  2.95trial/s, best loss: 0.22510822510822515]"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/site-packages/xgboost/sklearn.py:1224: UserWarning: The use of label encoder in XGBClassifier is deprecated and will be removed in a future release. To remove this warning, do the following: 1) Pass option use_label_encoder=False when constructing XGBClassifier object; and 2) Encode your labels (y) as integers starting with 0, i.e. 0, 1, 2, ..., [num_class - 1].\n",
      "  warnings.warn(label_encoder_deprecation_msg, UserWarning)\n",
      "\n",
      "/usr/local/lib/python3.9/site-packages/xgboost/data.py:262: FutureWarning: pandas.Int64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.\n",
      "  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):\n",
      "\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[17:15:17] WARNING: /Users/runner/work/xgboost/xgboost/src/learner.cc:576:       \n",
      "Parameters: { \"silent\" } might not be used.\n",
      "\n",
      "  This could be a false alarm, with some parameters getting used by language bindings but\n",
      "  then being mistakenly passed down to XGBoost core, or some parameter actually being used\n",
      "  but getting flagged wrongly here. Please open an issue if you find any such cases.\n",
      "\n",
      "\n",
      "[17:15:17] WARNING: /Users/runner/work/xgboost/xgboost/src/learner.cc:1115: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.\n",
      " 40%|████      | 8/20 [00:03<00:04,  2.55trial/s, best loss: 0.22510822510822515]"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/site-packages/xgboost/sklearn.py:1224: UserWarning: The use of label encoder in XGBClassifier is deprecated and will be removed in a future release. To remove this warning, do the following: 1) Pass option use_label_encoder=False when constructing XGBClassifier object; and 2) Encode your labels (y) as integers starting with 0, i.e. 0, 1, 2, ..., [num_class - 1].\n",
      "  warnings.warn(label_encoder_deprecation_msg, UserWarning)\n",
      "\n",
      "/usr/local/lib/python3.9/site-packages/xgboost/data.py:262: FutureWarning: pandas.Int64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.\n",
      "  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):\n",
      "\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[17:15:18] WARNING: /Users/runner/work/xgboost/xgboost/src/learner.cc:576:       \n",
      "Parameters: { \"silent\" } might not be used.\n",
      "\n",
      "  This could be a false alarm, with some parameters getting used by language bindings but\n",
      "  then being mistakenly passed down to XGBoost core, or some parameter actually being used\n",
      "  but getting flagged wrongly here. Please open an issue if you find any such cases.\n",
      "\n",
      "\n",
      "[17:15:18] WARNING: /Users/runner/work/xgboost/xgboost/src/learner.cc:1115: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.\n",
      " 50%|█████     | 10/20 [00:03<00:03,  3.18trial/s, best loss: 0.22510822510822515]"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/site-packages/xgboost/sklearn.py:1224: UserWarning: The use of label encoder in XGBClassifier is deprecated and will be removed in a future release. To remove this warning, do the following: 1) Pass option use_label_encoder=False when constructing XGBClassifier object; and 2) Encode your labels (y) as integers starting with 0, i.e. 0, 1, 2, ..., [num_class - 1].\n",
      "  warnings.warn(label_encoder_deprecation_msg, UserWarning)\n",
      "\n",
      "/usr/local/lib/python3.9/site-packages/xgboost/data.py:262: FutureWarning: pandas.Int64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.\n",
      "  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):\n",
      "\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[17:15:18] WARNING: /Users/runner/work/xgboost/xgboost/src/learner.cc:576:        \n",
      "Parameters: { \"silent\" } might not be used.\n",
      "\n",
      "  This could be a false alarm, with some parameters getting used by language bindings but\n",
      "  then being mistakenly passed down to XGBoost core, or some parameter actually being used\n",
      "  but getting flagged wrongly here. Please open an issue if you find any such cases.\n",
      "\n",
      "\n",
      "[17:15:18] WARNING: /Users/runner/work/xgboost/xgboost/src/learner.cc:1115: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.\n",
      " 50%|█████     | 10/20 [00:03<00:03,  3.18trial/s, best loss: 0.22510822510822515]"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/site-packages/xgboost/sklearn.py:1224: UserWarning: The use of label encoder in XGBClassifier is deprecated and will be removed in a future release. To remove this warning, do the following: 1) Pass option use_label_encoder=False when constructing XGBClassifier object; and 2) Encode your labels (y) as integers starting with 0, i.e. 0, 1, 2, ..., [num_class - 1].\n",
      "  warnings.warn(label_encoder_deprecation_msg, UserWarning)\n",
      "\n",
      "/usr/local/lib/python3.9/site-packages/xgboost/data.py:262: FutureWarning: pandas.Int64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.\n",
      "  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):\n",
      "\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[17:15:18] WARNING: /Users/runner/work/xgboost/xgboost/src/learner.cc:576:        \n",
      "Parameters: { \"silent\" } might not be used.\n",
      "\n",
      "  This could be a false alarm, with some parameters getting used by language bindings but\n",
      "  then being mistakenly passed down to XGBoost core, or some parameter actually being used\n",
      "  but getting flagged wrongly here. Please open an issue if you find any such cases.\n",
      "\n",
      "\n",
      "[17:15:18] WARNING: /Users/runner/work/xgboost/xgboost/src/learner.cc:1115: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.\n",
      " 55%|█████▌    | 11/20 [00:03<00:02,  3.40trial/s, best loss: 0.22510822510822515]"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/site-packages/xgboost/sklearn.py:1224: UserWarning: The use of label encoder in XGBClassifier is deprecated and will be removed in a future release. To remove this warning, do the following: 1) Pass option use_label_encoder=False when constructing XGBClassifier object; and 2) Encode your labels (y) as integers starting with 0, i.e. 0, 1, 2, ..., [num_class - 1].\n",
      "  warnings.warn(label_encoder_deprecation_msg, UserWarning)\n",
      "\n",
      "/usr/local/lib/python3.9/site-packages/xgboost/data.py:262: FutureWarning: pandas.Int64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.\n",
      "  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):\n",
      "\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[17:15:19] WARNING: /Users/runner/work/xgboost/xgboost/src/learner.cc:576:        \n",
      "Parameters: { \"silent\" } might not be used.\n",
      "\n",
      "  This could be a false alarm, with some parameters getting used by language bindings but\n",
      "  then being mistakenly passed down to XGBoost core, or some parameter actually being used\n",
      "  but getting flagged wrongly here. Please open an issue if you find any such cases.\n",
      "\n",
      "\n",
      "[17:15:19] WARNING: /Users/runner/work/xgboost/xgboost/src/learner.cc:1115: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.\n",
      " 60%|██████    | 12/20 [00:04<00:02,  2.86trial/s, best loss: 0.22510822510822515]"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/site-packages/xgboost/sklearn.py:1224: UserWarning: The use of label encoder in XGBClassifier is deprecated and will be removed in a future release. To remove this warning, do the following: 1) Pass option use_label_encoder=False when constructing XGBClassifier object; and 2) Encode your labels (y) as integers starting with 0, i.e. 0, 1, 2, ..., [num_class - 1].\n",
      "  warnings.warn(label_encoder_deprecation_msg, UserWarning)\n",
      "\n",
      "/usr/local/lib/python3.9/site-packages/xgboost/data.py:262: FutureWarning: pandas.Int64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.\n",
      "  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):\n",
      "\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[17:15:19] WARNING: /Users/runner/work/xgboost/xgboost/src/learner.cc:576:        \n",
      "Parameters: { \"silent\" } might not be used.\n",
      "\n",
      "  This could be a false alarm, with some parameters getting used by language bindings but\n",
      "  then being mistakenly passed down to XGBoost core, or some parameter actually being used\n",
      "  but getting flagged wrongly here. Please open an issue if you find any such cases.\n",
      "\n",
      "\n",
      "[17:15:19] WARNING: /Users/runner/work/xgboost/xgboost/src/learner.cc:1115: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.\n",
      " 65%|██████▌   | 13/20 [00:04<00:02,  2.65trial/s, best loss: 0.22510822510822515]"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/site-packages/xgboost/sklearn.py:1224: UserWarning: The use of label encoder in XGBClassifier is deprecated and will be removed in a future release. To remove this warning, do the following: 1) Pass option use_label_encoder=False when constructing XGBClassifier object; and 2) Encode your labels (y) as integers starting with 0, i.e. 0, 1, 2, ..., [num_class - 1].\n",
      "  warnings.warn(label_encoder_deprecation_msg, UserWarning)\n",
      "\n",
      "/usr/local/lib/python3.9/site-packages/xgboost/data.py:262: FutureWarning: pandas.Int64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.\n",
      "  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):\n",
      "\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[17:15:19] WARNING: /Users/runner/work/xgboost/xgboost/src/learner.cc:576:        \n",
      "Parameters: { \"silent\" } might not be used.\n",
      "\n",
      "  This could be a false alarm, with some parameters getting used by language bindings but\n",
      "  then being mistakenly passed down to XGBoost core, or some parameter actually being used\n",
      "  but getting flagged wrongly here. Please open an issue if you find any such cases.\n",
      "\n",
      "\n",
      "[17:15:19] WARNING: /Users/runner/work/xgboost/xgboost/src/learner.cc:1115: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.\n",
      " 70%|███████   | 14/20 [00:04<00:01,  3.05trial/s, best loss: 0.22510822510822515]"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/site-packages/xgboost/sklearn.py:1224: UserWarning: The use of label encoder in XGBClassifier is deprecated and will be removed in a future release. To remove this warning, do the following: 1) Pass option use_label_encoder=False when constructing XGBClassifier object; and 2) Encode your labels (y) as integers starting with 0, i.e. 0, 1, 2, ..., [num_class - 1].\n",
      "  warnings.warn(label_encoder_deprecation_msg, UserWarning)\n",
      "\n",
      "/usr/local/lib/python3.9/site-packages/xgboost/data.py:262: FutureWarning: pandas.Int64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.\n",
      "  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):\n",
      "\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[17:15:20] WARNING: /Users/runner/work/xgboost/xgboost/src/learner.cc:576:        \n",
      "Parameters: { \"silent\" } might not be used.\n",
      "\n",
      "  This could be a false alarm, with some parameters getting used by language bindings but\n",
      "  then being mistakenly passed down to XGBoost core, or some parameter actually being used\n",
      "  but getting flagged wrongly here. Please open an issue if you find any such cases.\n",
      "\n",
      "\n",
      "[17:15:20] WARNING: /Users/runner/work/xgboost/xgboost/src/learner.cc:1115: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.\n",
      " 75%|███████▌  | 15/20 [00:05<00:01,  3.00trial/s, best loss: 0.22510822510822515]"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/site-packages/xgboost/sklearn.py:1224: UserWarning: The use of label encoder in XGBClassifier is deprecated and will be removed in a future release. To remove this warning, do the following: 1) Pass option use_label_encoder=False when constructing XGBClassifier object; and 2) Encode your labels (y) as integers starting with 0, i.e. 0, 1, 2, ..., [num_class - 1].\n",
      "  warnings.warn(label_encoder_deprecation_msg, UserWarning)\n",
      "\n",
      "/usr/local/lib/python3.9/site-packages/xgboost/data.py:262: FutureWarning: pandas.Int64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.\n",
      "  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):\n",
      "\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[17:15:20] WARNING: /Users/runner/work/xgboost/xgboost/src/learner.cc:576:        \n",
      "Parameters: { \"silent\" } might not be used.\n",
      "\n",
      "  This could be a false alarm, with some parameters getting used by language bindings but\n",
      "  then being mistakenly passed down to XGBoost core, or some parameter actually being used\n",
      "  but getting flagged wrongly here. Please open an issue if you find any such cases.\n",
      "\n",
      "\n",
      "[17:15:20] WARNING: /Users/runner/work/xgboost/xgboost/src/learner.cc:1115: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.\n",
      " 85%|████████▌ | 17/20 [00:05<00:00,  3.26trial/s, best loss: 0.22510822510822515]"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/site-packages/xgboost/sklearn.py:1224: UserWarning: The use of label encoder in XGBClassifier is deprecated and will be removed in a future release. To remove this warning, do the following: 1) Pass option use_label_encoder=False when constructing XGBClassifier object; and 2) Encode your labels (y) as integers starting with 0, i.e. 0, 1, 2, ..., [num_class - 1].\n",
      "  warnings.warn(label_encoder_deprecation_msg, UserWarning)\n",
      "\n",
      "/usr/local/lib/python3.9/site-packages/xgboost/data.py:262: FutureWarning: pandas.Int64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.\n",
      "  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):\n",
      "\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[17:15:20] WARNING: /Users/runner/work/xgboost/xgboost/src/learner.cc:576:        \n",
      "Parameters: { \"silent\" } might not be used.\n",
      "\n",
      "  This could be a false alarm, with some parameters getting used by language bindings but\n",
      "  then being mistakenly passed down to XGBoost core, or some parameter actually being used\n",
      "  but getting flagged wrongly here. Please open an issue if you find any such cases.\n",
      "\n",
      "\n",
      "[17:15:20] WARNING: /Users/runner/work/xgboost/xgboost/src/learner.cc:1115: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.\n",
      "[17:15:20] WARNING: /Users/runner/work/xgboost/xgboost/src/learner.cc:576:        \n",
      "Parameters: { \"silent\" } might not be used.\n",
      "\n",
      "  This could be a false alarm, with some parameters getting used by language bindings but\n",
      "  then being mistakenly passed down to XGBoost core, or some parameter actually being used\n",
      "  but getting flagged wrongly here. Please open an issue if you find any such cases.\n",
      "\n",
      "\n",
      "[17:15:20] WARNING: /Users/runner/work/xgboost/xgboost/src/learner.cc:1115: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.\n",
      " 90%|█████████ | 18/20 [00:06<00:00,  3.91trial/s, best loss: 0.22510822510822515]"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/site-packages/xgboost/sklearn.py:1224: UserWarning: The use of label encoder in XGBClassifier is deprecated and will be removed in a future release. To remove this warning, do the following: 1) Pass option use_label_encoder=False when constructing XGBClassifier object; and 2) Encode your labels (y) as integers starting with 0, i.e. 0, 1, 2, ..., [num_class - 1].\n",
      "  warnings.warn(label_encoder_deprecation_msg, UserWarning)\n",
      "\n",
      "/usr/local/lib/python3.9/site-packages/xgboost/data.py:262: FutureWarning: pandas.Int64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.\n",
      "  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):\n",
      "\n",
      "/usr/local/lib/python3.9/site-packages/xgboost/sklearn.py:1224: UserWarning: The use of label encoder in XGBClassifier is deprecated and will be removed in a future release. To remove this warning, do the following: 1) Pass option use_label_encoder=False when constructing XGBClassifier object; and 2) Encode your labels (y) as integers starting with 0, i.e. 0, 1, 2, ..., [num_class - 1].\n",
      "  warnings.warn(label_encoder_deprecation_msg, UserWarning)\n",
      "\n",
      "/usr/local/lib/python3.9/site-packages/xgboost/data.py:262: FutureWarning: pandas.Int64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.\n",
      "  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):\n",
      "\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[17:15:21] WARNING: /Users/runner/work/xgboost/xgboost/src/learner.cc:576:        \n",
      "Parameters: { \"silent\" } might not be used.\n",
      "\n",
      "  This could be a false alarm, with some parameters getting used by language bindings but\n",
      "  then being mistakenly passed down to XGBoost core, or some parameter actually being used\n",
      "  but getting flagged wrongly here. Please open an issue if you find any such cases.\n",
      "\n",
      "\n",
      "[17:15:21] WARNING: /Users/runner/work/xgboost/xgboost/src/learner.cc:1115: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.\n",
      " 95%|█████████▌| 19/20 [00:06<00:00,  3.24trial/s, best loss: 0.22510822510822515]"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/site-packages/xgboost/sklearn.py:1224: UserWarning: The use of label encoder in XGBClassifier is deprecated and will be removed in a future release. To remove this warning, do the following: 1) Pass option use_label_encoder=False when constructing XGBClassifier object; and 2) Encode your labels (y) as integers starting with 0, i.e. 0, 1, 2, ..., [num_class - 1].\n",
      "  warnings.warn(label_encoder_deprecation_msg, UserWarning)\n",
      "\n",
      "/usr/local/lib/python3.9/site-packages/xgboost/data.py:262: FutureWarning: pandas.Int64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.\n",
      "  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):\n",
      "\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "100%|██████████| 20/20 [00:06<00:00,  2.91trial/s, best loss: 0.22510822510822515]\n"
     ]
    }
   ],
   "source": [
    "trials=Trials()\n",
    "best=fmin(objective,space,algo=tpe.suggest,trials=trials,max_evals=20)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "{'colsample_bytree': 0.9500000000000001, 'learning_rate': 0.275, 'max_depth': 11.0, 'n_estimators': 245.0, 'subsample': 0.6000000000000001}\n"
     ]
    }
   ],
   "source": [
    "print(best)\n",
    "\n",
    "best['n_estimators']=int(best['n_estimators'])\n",
    "best['max_depth']=int(best['max_depth'])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,\n",
       "              colsample_bynode=None, colsample_bytree=0.9500000000000001,\n",
       "              enable_categorical=False, gamma=None, gpu_id=None,\n",
       "              importance_type=None, interaction_constraints=None,\n",
       "              learning_rate=0.275, max_delta_step=None, max_depth=11,\n",
       "              min_child_weight=None, missing=nan, monotone_constraints=None,\n",
       "              n_estimators=245, n_jobs=None, num_parallel_tree=None,\n",
       "              predictor=None, random_state=None, reg_alpha=None,\n",
       "              reg_lambda=None, scale_pos_weight=None,\n",
       "              subsample=0.6000000000000001, tree_method=None,\n",
       "              validate_parameters=None, verbosity=None)"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "tree_v5 = XGBClassifier(**best)\n",
    "tree_v5"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/site-packages/xgboost/sklearn.py:1224: UserWarning: The use of label encoder in XGBClassifier is deprecated and will be removed in a future release. To remove this warning, do the following: 1) Pass option use_label_encoder=False when constructing XGBClassifier object; and 2) Encode your labels (y) as integers starting with 0, i.e. 0, 1, 2, ..., [num_class - 1].\n",
      "  warnings.warn(label_encoder_deprecation_msg, UserWarning)\n",
      "/usr/local/lib/python3.9/site-packages/xgboost/data.py:262: FutureWarning: pandas.Int64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.\n",
      "  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "[17:14:44] WARNING: /Users/runner/work/xgboost/xgboost/src/learner.cc:1115: Starting in XGBoost 1.3.0, the default evaluation metric used with the objective 'binary:logistic' was changed from 'error' to 'logloss'. Explicitly set eval_metric if you'd like to restore the old behavior.\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,\n",
       "              colsample_bynode=1, colsample_bytree=0.75,\n",
       "              enable_categorical=False, gamma=0, gpu_id=-1,\n",
       "              importance_type=None, interaction_constraints='',\n",
       "              learning_rate=0.375, max_delta_step=0, max_depth=1,\n",
       "              min_child_weight=1, missing=nan, monotone_constraints='()',\n",
       "              n_estimators=655, n_jobs=8, num_parallel_tree=1, predictor='auto',\n",
       "              random_state=0, reg_alpha=0, reg_lambda=1, scale_pos_weight=1,\n",
       "              subsample=1.0, tree_method='exact', validate_parameters=1,\n",
       "              verbosity=None)"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "tree_v5.fit(X_train, Y_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "accuracy del clasificador - version 5 : 0.81\n",
      "matriz de confusión del clasificador - version 5: \n",
      " [[128  18]\n",
      " [ 26  59]]\n",
      "precision del clasificador - version 5 : 0.77\n",
      "recall del clasificador - version 5 : 0.69\n",
      "f1 del clasificador - version 5 : 0.73\n"
     ]
    },
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "/usr/local/lib/python3.9/site-packages/xgboost/data.py:262: FutureWarning: pandas.Int64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.\n",
      "  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):\n",
      "/usr/local/lib/python3.9/site-packages/xgboost/data.py:262: FutureWarning: pandas.Int64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.\n",
      "  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):\n",
      "/usr/local/lib/python3.9/site-packages/xgboost/data.py:262: FutureWarning: pandas.Int64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.\n",
      "  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):\n",
      "/usr/local/lib/python3.9/site-packages/xgboost/data.py:262: FutureWarning: pandas.Int64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.\n",
      "  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):\n",
      "/usr/local/lib/python3.9/site-packages/xgboost/data.py:262: FutureWarning: pandas.Int64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.\n",
      "  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):\n"
     ]
    }
   ],
   "source": [
    "# métricas de desempeño\n",
    "# accuracy\n",
    "print('accuracy del clasificador - version 5 : {0:.2f}'.format(accuracy_score(Y_test, tree_v5.predict(X_test))))\n",
    "# confusion matrix\n",
    "print('matriz de confusión del clasificador - version 5: \\n {0}'.format(confusion_matrix(Y_test, tree_v5.predict(X_test))))\n",
    "# precision \n",
    "print('precision del clasificador - version 5 : {0:.2f}'.format(precision_score(Y_test, tree_v5.predict(X_test))))\n",
    "# precision \n",
    "print('recall del clasificador - version 5 : {0:.2f}'.format(recall_score(Y_test, tree_v5.predict(X_test))))\n",
    "# f1\n",
    "print('f1 del clasificador - version 5 : {0:.2f}'.format(f1_score(Y_test, tree_v5.predict(X_test))))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Guardar el clasificador\n",
    "\n",
    "Python cuenta con librerias de serialización que facilitan guardar el clasificador en un archivo (pickle, joblib); este archivo puede ser restaurado para hacer predicciones."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pickle"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Cree la carpeta 'clasificador' en el folder donde está el notebook\n",
    "ruta_archivo_clasificador = os.path.join('tree_v5.pkl')\n",
    "# Abrir el archivo para escribir contenido binario\n",
    "archivo_clasificador = open(ruta_archivo_clasificador, 'wb')\n",
    "# Guardar el clasificador\n",
    "pickle.dump(tree_v5, archivo_clasificador)\n",
    "# Cerrar el archivo\n",
    "archivo_clasificador.close()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Cargar el clasificador"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {},
   "outputs": [],
   "source": [
    "#Abrir el archivo en modo lectura de contenido binario y cargar el clasificdor\n",
    "archivo_clasificador = open(ruta_archivo_clasificador, \"rb\")\n",
    "tree_v6 = pickle.load(archivo_clasificador)\n",
    "archivo_clasificador.close()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "accuracy del clasificador - version 6 : 0.79\n",
      "matriz de confusión del clasificador - version 6: \n",
      " [[126  20]\n",
      " [ 29  56]]\n",
      "precision del clasificador - version 6 : 0.74\n",
      "recall del clasificador - version 6 : 0.66\n",
      "f1 del clasificador - version 6 : 0.70\n"
     ]
    }
   ],
   "source": [
    "# métricas de desempeño\n",
    "# accuracy\n",
    "print('accuracy del clasificador - version 6 : {0:.2f}'.format(accuracy_score(Y_test, tree_v6.predict(X_test))))\n",
    "# confusion matrix\n",
    "print('matriz de confusión del clasificador - version 6: \\n {0}'.format(confusion_matrix(Y_test, tree_v6.predict(X_test))))\n",
    "# precision \n",
    "print('precision del clasificador - version 6 : {0:.2f}'.format(precision_score(Y_test, tree_v6.predict(X_test))))\n",
    "# precision \n",
    "print('recall del clasificador - version 6 : {0:.2f}'.format(recall_score(Y_test, tree_v6.predict(X_test))))\n",
    "# f1\n",
    "print('f1 del clasificador - version 6 : {0:.2f}'.format(f1_score(Y_test, tree_v6.predict(X_test))))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "##  Modificar el clasificador"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "XGBClassifier(base_score=0.5, booster=None, colsample_bylevel=1,\n",
       "              colsample_bynode=1, colsample_bytree=0.9, gamma=0, gpu_id=-1,\n",
       "              importance_type='gain', interaction_constraints=None,\n",
       "              learning_rate=0.025, max_delta_step=0, max_depth=4,\n",
       "              min_child_weight=1, missing=nan, monotone_constraints=None,\n",
       "              n_estimators=700, n_jobs=0, num_parallel_tree=1,\n",
       "              objective='binary:logistic', random_state=0, reg_alpha=0,\n",
       "              reg_lambda=1, scale_pos_weight=1, subsample=0.6000000000000001,\n",
       "              tree_method=None, validate_parameters=False, verbosity=None)"
      ]
     },
     "execution_count": 43,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "tree_v6.n_estimators = 700\n",
    "# Volver a entrenar el clasificador con los nuevos parámetros\n",
    "tree_v6.fit(X_train,Y_train)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Guardar el nuevo clasificador\n",
    "ruta_archivo_clasificador = os.path.join('tree_v6.pkl')\n",
    "archivo_clasificador = open(ruta_archivo_clasificador, \"wb\")\n",
    "pickle.dump(tree_v6, archivo_clasificador)\n",
    "archivo_clasificador.close()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "##  Opciones de despliegue\n",
    "\n",
    "<img src=\"./img/35-opciones-despliegue.png\" width=\"600px\"/>"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Flask\n",
    "\n",
    "Flask es un **framework** minimalista escrito en Python que permite crear aplicaciones web rápidamente y con un mínimo número de líneas de código - **Wikipedia**.\n",
    "\n",
    "__[Flask](https://flask.palletsprojects.com/en/1.1.x/)__\n",
    "\n",
    "Ahora, utilizando el clasificador guardado anteriormente en un archivo binario, se creará un servicio API REST en Flask para poder utilizarlo. Para hacerlo funcionar hacerlo, colocar el código en un archivo .py y hacerlo correr en la consola."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# http://flask.palletsprojects.com/en/1.1.x/quickstart/#quickstart\n",
    "from flask import Flask, request, jsonify\n",
    "import os\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "from sklearn.model_selection import train_test_split\n",
    "from sklearn.metrics import accuracy_score\n",
    "import pickle\n",
    "\n",
    "app = Flask(__name__)\n",
    "\n",
    "classifier_filepath = os.path.join(\"tree_v6.pkl\")\n",
    "classifier_file = open(classifier_filepath, \"rb\")\n",
    "classifier = pickle.load(open(classifier_filepath, \"rb\"))\n",
    "classifier_file.close()\n",
    "\n",
    "# Desactiva el API /predict del clasificador.\n",
    "# retorna {\"message\": \"/predict disabled\"}, 200 OK\n",
    "@app.route('/disable', methods=['GET'])\n",
    "def disable():\n",
    "    global ACTIVATED\n",
    "    ACTIVATED = False\n",
    "    return {'message': '/predict disabled'}, 200\n",
    "\n",
    "# Activa el API /predict del clasificador.\n",
    "# retorna {\"message\": \"/predict enabled\"}, 200 OK\n",
    "@app.route('/enable', methods=['GET'])\n",
    "def enable():\n",
    "    global ACTIVATED\n",
    "    ACTIVATED = True\n",
    "    return {'message': '/predict enabled'}, 200\n",
    "\n",
    "# Entrena el modelo con los nuevos hyper-parámetros y retorna la nueva exactitud. Por ejemplo, {\"accuracy\": 0.81}, 200 OK\n",
    "# Se pueden enviar los siguiente hyper-parámetros: { \"n_estimators\": 10, \"criterion\": \"gini\", \"max_depth\": 7 }\n",
    "# \"criterion\" puede ser \"gini\" o \"entropy\", \"n_estimators\" y \"max_depth\" son un número entero positivo\n",
    "# Unicamente \"max_depth\" es opcional en cuyo caso se deberá emplear None. Si los otros hyper-parámetros no están presentes se retorna:\n",
    "# {\"message\": \"missing hyper-parameter\"}, 404 BAD REQUEST\n",
    "# Finalmente, sólo se puede ejecutar este endpoint después de ejecutar GET /disable. En otro caso retorna {\"message\": \"can not reset an enabled classifier\"}, 400 BAD REQUEST\n",
    "@app.route('/reset', methods=['POST'])\n",
    "def reset():\n",
    "    if ACTIVATED:\n",
    "        return {\"message\": \"can not reset an enabled classifier\"}, 400\n",
    "    json_request = request.get_json(force=True)\n",
    "    if 'criterion' not in json_request or 'n_estimators' not in json_request:\n",
    "        return {\"message\": \"missing hyper-parameter\"}, 400\n",
    "\n",
    "    classifier.n_estimators = json_request.get('n_estimators')\n",
    "    classifier.criterion = json_request.get('criterion')\n",
    "    classifier.max_depth = json_request.get('max_depth')\n",
    " \n",
    "    df = pd.read_csv(os.path.join(\"diabetes.csv\"))\n",
    "    feature_cols = ['Pregnancies', 'Insulin', 'BMI', 'Age',\n",
    "                    'Glucose', 'BloodPressure', 'DiabetesPedigreeFunction']\n",
    "    X = df[feature_cols]\n",
    "    Y = df[\"Outcome\"]\n",
    "    X_train, X_test, Y_train, Y_test = train_test_split(\n",
    "        X, Y, test_size=0.3, random_state=1)\n",
    "\n",
    "    classifier.fit(X_train, Y_train)\n",
    "    return {'accuracy': accuracy_score(Y_test, classifier.predict(X_test))}, 200\n",
    "\n",
    "# Recibe una lista de observaciones y retorna la clasificación para cada una de ellas.\n",
    "# Los valores en cada observación se corresponden con la siguientes variables:\n",
    "#['Pregnancies', 'Insulin', 'BMI', 'Age', 'Glucose', 'BloodPressure', 'DiabetesPedigreeFunction']\n",
    "# Por ejemplo: para estas observaciones:\n",
    "# [\n",
    "#\t[7,135,26.0,51,136,74,0.647],\n",
    "#\t[9,175,34.2,36,112,82,0.260]\n",
    "# ]\n",
    "@app.route('/predict', methods=['POST'])\n",
    "def predict():\n",
    "    if not ACTIVATED:\n",
    "        return {\"message\": \"classifier is not enabled\"}, 400\n",
    "    predict_request = request.get_json(force=True)\n",
    "    predict_response = classifier.predict(predict_request)\n",
    "    return {'cases': predict_request,\n",
    "            'diabetes': predict_response.tolist()}\n",
    "\n",
    "\n",
    "if __name__ == '__main__':\n",
    "    app.run(port=8080, debug=True)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.7"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}